{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "%matplotlib inline"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\n# Ledoit-Wolf vs OAS estimation\n\n\nThe usual covariance maximum likelihood estimate can be regularized\nusing shrinkage. Ledoit and Wolf proposed a close formula to compute\nthe asymptotically optimal shrinkage parameter (minimizing a MSE\ncriterion), yielding the Ledoit-Wolf covariance estimate.\n\nChen et al. proposed an improvement of the Ledoit-Wolf shrinkage\nparameter, the OAS coefficient, whose convergence is significantly\nbetter under the assumption that the data are Gaussian.\n\nThis example, inspired from Chen's publication [1], shows a comparison\nof the estimated MSE of the LW and OAS methods, using Gaussian\ndistributed data.\n\n[1] \"Shrinkage Algorithms for MMSE Covariance Estimation\"\nChen et al., IEEE Trans. on Sign. Proc., Volume 58, Issue 10, October 2010.\n\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "print(__doc__)\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom scipy.linalg import toeplitz, cholesky\n\nfrom sklearn.covariance import LedoitWolf, OAS\n\nnp.random.seed(0)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "n_features = 100\n# simulation covariance matrix (AR(1) process)\nr = 0.1\nreal_cov = toeplitz(r ** np.arange(n_features))\ncoloring_matrix = cholesky(real_cov)\n\nn_samples_range = np.arange(6, 31, 1)\nrepeat = 100\nlw_mse = np.zeros((n_samples_range.size, repeat))\noa_mse = np.zeros((n_samples_range.size, repeat))\nlw_shrinkage = np.zeros((n_samples_range.size, repeat))\noa_shrinkage = np.zeros((n_samples_range.size, repeat))\nfor i, n_samples in enumerate(n_samples_range):\n    for j in range(repeat):\n        X = np.dot(\n            np.random.normal(size=(n_samples, n_features)), coloring_matrix.T)\n\n        lw = LedoitWolf(store_precision=False, assume_centered=True)\n        lw.fit(X)\n        lw_mse[i, j] = lw.error_norm(real_cov, scaling=False)\n        lw_shrinkage[i, j] = lw.shrinkage_\n\n        oa = OAS(store_precision=False, assume_centered=True)\n        oa.fit(X)\n        oa_mse[i, j] = oa.error_norm(real_cov, scaling=False)\n        oa_shrinkage[i, j] = oa.shrinkage_\n\n# plot MSE\nplt.subplot(2, 1, 1)\nplt.errorbar(n_samples_range, lw_mse.mean(1), yerr=lw_mse.std(1),\n             label='Ledoit-Wolf', color='navy', lw=2)\nplt.errorbar(n_samples_range, oa_mse.mean(1), yerr=oa_mse.std(1),\n             label='OAS', color='darkorange', lw=2)\nplt.ylabel(\"Squared error\")\nplt.legend(loc=\"upper right\")\nplt.title(\"Comparison of covariance estimators\")\nplt.xlim(5, 31)\n\n# plot shrinkage coefficient\nplt.subplot(2, 1, 2)\nplt.errorbar(n_samples_range, lw_shrinkage.mean(1), yerr=lw_shrinkage.std(1),\n             label='Ledoit-Wolf', color='navy', lw=2)\nplt.errorbar(n_samples_range, oa_shrinkage.mean(1), yerr=oa_shrinkage.std(1),\n             label='OAS', color='darkorange', lw=2)\nplt.xlabel(\"n_samples\")\nplt.ylabel(\"Shrinkage\")\nplt.legend(loc=\"lower right\")\nplt.ylim(plt.ylim()[0], 1. + (plt.ylim()[1] - plt.ylim()[0]) / 10.)\nplt.xlim(5, 31)\n\nplt.show()"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.6.9"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}